Load in librarys & data set.

install.packages(stringr)
library(tidyverse)
library(dplyr)
library(stringr)
read_csv("data/books.csv")

Rename data set for research

Initial reading of data set

dim(books_info)
[1] 11131    12
nrow(books_info)
[1] 11131
ncol(books_info)
[1] 12
names(books_info)
 [1] "bookID"             "title"              "authors"           
 [4] "average_rating"     "isbn"               "isbn13"            
 [7] "language_code"      "num_pages"          "ratings_count"     
[10] "text_reviews_count" "publication_date"   "publisher"         

Find missing values

books_info %>%
  select(everything()) %>%
  summarise_all(funs(sum(is.na(.))))
`funs()` is deprecated as of dplyr 0.8.0.
Please use a list of either functions or lambdas: 

  # Simple named list: 
  list(mean = mean, median = median)

  # Auto named with `tibble::lst()`: 
  tibble::lst(mean, median)

  # Using lambdas
  list(~ mean(., trim = .2), ~ median(., na.rm = TRUE))
This warning is displayed once every 8 hours.
Call `lifecycle::last_warnings()` to see where this warning was generated.

Replace/remove missing values

Review/investigate data set

View the data set by only looking at author,average rating, rating count and publisher.

Arrange order by average rating in desc

The above data shows that arranginf by the average rating is missleading as some of these results have been rated very few times therefore have a higher average rating.

Lets arrange the same data set by ratings count to compare the average rating to the number of ratings to get a better value.

arrange(top_rated, desc(ratings_count)) %>% 
  print

The data above lets us see the average rating in desc order compared to the average rating.

Filter only results by author j.k rowling/mary grandpre

Organise above date by rating count in desc order

jkrowling_books <-jkrowling %>% 
  select(authors, average_rating, ratings_count, publication_date)
arrange(jkrowling_books, desc(ratings_count))

Arrange by average rating

jkrowling_books %>% 
  select(authors, average_rating, ratings_count)
arrange(jkrowling_books, desc(average_rating))

Now lets look at the data that only has over 20000000 or above ratings

jk_highest_rated <- jkrowling_books %>% 
  select(average_rating, ratings_count) %>% 
  filter(ratings_count >= 2000000)
jk_highest_rated
LS0tCnRpdGxlOiAiUiBOb3RlYm9vayIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQoKTG9hZCBpbiBsaWJyYXJ5cyAmIGRhdGEgc2V0LgoKCmBgYHtyfQoKYGBgCgpgYGB7cn0KaW5zdGFsbC5wYWNrYWdlcyhzdHJpbmdyKQpsaWJyYXJ5KHRpZHl2ZXJzZSkKbGlicmFyeShkcGx5cikKbGlicmFyeShzdHJpbmdyKQpyZWFkX2NzdigiZGF0YS9ib29rcy5jc3YiKQoKYGBgCgpSZW5hbWUgZGF0YSBzZXQgZm9yIHJlc2VhcmNoCgpgYGB7cn0KYm9va3NfaW5mbyA8LSByZWFkLmNzdigiZGF0YS9ib29rcy5jc3YiKQoKYm9va3NfaW5mbwpgYGAKCgpJbml0aWFsIHJlYWRpbmcgb2YgZGF0YSBzZXQKCmBgYHtyfQpkaW0oYm9va3NfaW5mbykKYGBgCgoKYGBge3J9Cm5yb3coYm9va3NfaW5mbykKYGBgCgpgYGB7cn0KbmNvbChib29rc19pbmZvKQpgYGAKCmBgYHtyfQpuYW1lcyhib29rc19pbmZvKQpgYGAKCkZpbmQgbWlzc2luZyB2YWx1ZXMgCgpgYGB7cn0KYm9va3NfaW5mbyAlPiUKICBzZWxlY3QoZXZlcnl0aGluZygpKSAlPiUKICBzdW1tYXJpc2VfYWxsKGZ1bnMoc3VtKGlzLm5hKC4pKSkpCmBgYAoKUmVwbGFjZS9yZW1vdmUgbWlzc2luZyB2YWx1ZXMKCmBgYHtyfQpib29rc19pbmZvMiA8LSBib29rc19pbmZvICU+JSAKICBkcm9wX25hKHJhdGluZ3NfY291bnQsIHRleHRfcmV2aWV3c19jb3VudCkKYm9va3NfaW5mbzIKYGBgCgpSZXZpZXcvaW52ZXN0aWdhdGUgZGF0YSBzZXQKClZpZXcgdGhlIGRhdGEgc2V0IGJ5IG9ubHkgbG9va2luZyBhdCBhdXRob3IsYXZlcmFnZSByYXRpbmcsIHJhdGluZyBjb3VudCBhbmQgcHVibGlzaGVyLgoKQXJyYW5nZSBvcmRlciBieSBhdmVyYWdlIHJhdGluZyBpbiBkZXNjCgpgYGB7cn0KdG9wX3JhdGVkIDwtIGJvb2tzX2luZm8yICU+JSAKICBzZWxlY3QoYXV0aG9ycywgYXZlcmFnZV9yYXRpbmcsIHJhdGluZ3NfY291bnQpCiAgYXJyYW5nZSh0b3BfcmF0ZWQsIGRlc2MoYXZlcmFnZV9yYXRpbmcpKQp0b3BfcmF0ZWQKYGBgClRoZSBhYm92ZSBkYXRhIHNob3dzIHRoYXQgYXJyYW5naW5mIGJ5IHRoZSBhdmVyYWdlIHJhdGluZyBpcyBtaXNzbGVhZGluZyBhcyBzb21lIG9mIHRoZXNlIHJlc3VsdHMgaGF2ZSBiZWVuIHJhdGVkIHZlcnkgZmV3IHRpbWVzIHRoZXJlZm9yZSBoYXZlIGEgaGlnaGVyIGF2ZXJhZ2UgcmF0aW5nLgoKTGV0cyBhcnJhbmdlIHRoZSBzYW1lIGRhdGEgc2V0IGJ5IHJhdGluZ3MgY291bnQgdG8gY29tcGFyZSB0aGUgYXZlcmFnZSByYXRpbmcgdG8gdGhlIG51bWJlciBvZiByYXRpbmdzIHRvIGdldCBhIGJldHRlciB2YWx1ZS4KCmBgYHtyfQphcnJhbmdlKHRvcF9yYXRlZCwgZGVzYyhyYXRpbmdzX2NvdW50KSkgJT4lIAogIHByaW50CmBgYApUaGUgZGF0YSBhYm92ZSBsZXRzIHVzIHNlZSB0aGUgYXZlcmFnZSByYXRpbmcgaW4gZGVzYyBvcmRlciBjb21wYXJlZCB0byB0aGUgYXZlcmFnZSByYXRpbmcuCgoKRmlsdGVyIG9ubHkgcmVzdWx0cyBieSBhdXRob3Igai5rIHJvd2xpbmcvbWFyeSBncmFuZHByZQoKYGBge3J9Cmprcm93bGluZyA8LSBmaWx0ZXIoYm9va3NfaW5mbzIsIGF1dGhvcnMgPT0gIkouSy4gUm93bGluZy9NYXJ5IEdyYW5kUHLDqSIpCgpqa3Jvd2xpbmcKYGBgCgpPcmdhbmlzZSBhYm92ZSBkYXRlIGJ5IHJhdGluZyBjb3VudCBpbiBkZXNjIG9yZGVyCgpgYGB7cn0Kamtyb3dsaW5nX2Jvb2tzIDwtamtyb3dsaW5nICU+JSAKICBzZWxlY3QoYXV0aG9ycywgYXZlcmFnZV9yYXRpbmcsIHJhdGluZ3NfY291bnQpCmFycmFuZ2Uoamtyb3dsaW5nX2Jvb2tzLCBkZXNjKHJhdGluZ3NfY291bnQpKQpgYGAKCkFycmFuZ2UgYnkgYXZlcmFnZSByYXRpbmcgCgpgYGB7cn0Kamtyb3dsaW5nX2Jvb2tzICU+JSAKICBzZWxlY3QoYXV0aG9ycywgYXZlcmFnZV9yYXRpbmcsIHJhdGluZ3NfY291bnQpCmFycmFuZ2Uoamtyb3dsaW5nX2Jvb2tzLCBkZXNjKGF2ZXJhZ2VfcmF0aW5nKSkKYGBgCgpOb3cgbGV0cyBsb29rIGF0IHRoZSBkYXRhIHRoYXQgb25seSBoYXMgb3ZlciAyMDAwMDAwMCBvciBhYm92ZSByYXRpbmdzCgpgYGB7cn0KamtfaGlnaGVzdF9yYXRlZCA8LSBqa3Jvd2xpbmdfYm9va3MgJT4lIAogIHNlbGVjdChhdmVyYWdlX3JhdGluZywgcmF0aW5nc19jb3VudCkgJT4lIAogIGZpbHRlcihyYXRpbmdzX2NvdW50ID49IDIwMDAwMDApCmprX2hpZ2hlc3RfcmF0ZWQKYGBgCgo=